/*
This code attempts to estimate treatment effects of ppp
*/

do "${dodir}/make_globals.do"

ssc install drdid, all replace
ssc install csdid, all replace

log using $logdir/noppp_regs.log, replace

////////////////////////////////////////////////////////////////////////////////
// step 1: wage growth regs
////////////////////////////////////////////////////////////////////////////////

use id q min_emp_2018 min_emp_2019 min_emp_2020q1 naics_2_mode st_mode ///
med_wage total_compensation num_employees g using ${datadir}/reg_data, clear

// drop if 0 or 1 employees at any point in 2018 or 2019
drop if min_emp_2018==0 | min_emp_2018==1 | min_emp_2019==0 | min_emp_2019==1 | min_emp_2020q1==0 | min_emp_2020q1==1

// drop if two digit naics is missing
drop if naics_2_mode==. | naics_2_mode==0

// drop if state is missing
drop if st_mode==0 | st_mode==.	

// drop restuarants (special covid treatment), public admin, and utilities
keep if (naics_2_mode~=92 & naics_2_mode~=22 & naics_2_mode~=72 & naics_2_mode~=49 & naics_2_mode~=55)

// create mean employee size in 2019
g x_2019=num_employees if q>=5 & q<=8
	
egen mean_emp_2019=mean(x_2019), by(id)

drop x_2019

// drop if never treated

preserve

	gcollapse (lastnm) g, by(id)
	
	tab g

restore

replace g=0 if g==.



// clean the compensation variables
replace med_wage=0 if med_wage==.
replace total_compensation=0 if total_compensation==.

// have wages be the max of med_wages and total_compensation
replace med_wage=max(med_wage, total_compensation)

gen mean_comp=med_wage/num_employees

// replace med_wage to be "100000*employee count" if mean comp exceeds 100k
replace med_wage=100000*num_employees if mean_comp>100000 & mean_comp~=.


// size bin
g firm_size_bin=0 if min_emp_2018<10
	replace firm_size_bin=1 if min_emp_2018>=10 & min_emp_2018<50
	replace firm_size_bin=2 if min_emp_2018>=50 & min_emp_2018<100
	replace firm_size_bin=3 if min_emp_2018>=100 & min_emp_2018<250
	replace firm_size_bin=4 if min_emp_2018>=250

	
// time series
tsset id q

// define base year compensation	
g comp_2018=l8.med_wage if q>=9 & q<=12
	replace comp_2018=l4.med_wage if q>=5 & q<=8
	replace comp_2018=l12.med_wage if q>=13 & q<=16
	replace comp_2018=l16.med_wage if q>16
	
g comp_2019=l4.med_wage if q>=9 & q<=12
	replace comp_2019=l8.med_wage if q>=13 & q<=16
	replace comp_2019=l12.med_wage if q>16

// define base year outcome variable
g y=med_wage/comp_2018 if q>4

// cap outcome variable at 10
	replace y=10 if y>10 & y!=.

// drop if y is missing after the base year
gen deerop1=0
replace deerop1=1 if y==. & q>4
egen deerop2=max(deerop1), by(id)
drop if deerop2==1
drop deerop1 deerop2

// create cell identifiers
preserve

	gcollapse (mean) y_m=y, by(naics_2_mode st_mode firm_size_bin)
	
	g id=_n
	
	save $datadir/y_m, replace

restore

merge m:1 naics_2_mode st_mode firm_size_bin using $datadir/y_m, update replace nogen

g y_dm=y-y_m

gsort id g

preserve

	gcollapse (lastnm) g min_emp_2018 comp_2018, by(id)
	
	tab g
	
	g mean_comp=comp_2018/min_emp_2018
	
	tabstat min_emp_2018 comp_2018 mean_comp, s(mean) by(g)
	
	tabstat min_emp_2018 comp_2018 mean_comp, s(p50) by(g)
	
	
restore
	
	

forval xx= 5/5{
	
	if `xx' == 5 {
		csdid y_dm if mean_emp_2019<600, i(id) t(q) g(g) method(dripw) wboot
	}
	else {
		csdid y_dm if firm_size_bin==`xx', i(id) t(q) g(g) method(dripw) wboot
	}
	
	
	
	estat simple, estore(bin`xx'_att)
	preserve
	clear
	svmat double r(table), names(col)
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	gen firm_size_bin = `xx'
	save "${datadir}/reg_tables/reg_att_growth_noppp_bin`xx'", replace
	restore	
	
	estat event

	preserve
	clear
	svmat double r(table), names(col)
	gen firm_size_bin = `xx'
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	merge 1:1 stat using "${datadir}/reg_tables/reg_att_growth_noppp_bin`xx'", nogen

	save "${datadir}/reg_tables/reg_table_growth_noppp_bin`xx'", replace
	restore
	
	estat event
	
	csdid_plot, graphregion(color(white)) bgcolor(white) ///
	ylabel(,format(%9.2fc)) ///
	ytitle("Growth in Wages (Base Year: 2018)") ///
	xtitle("Quarters to Treatment") xscale(r(-8 6)) xlabel(-8(2)6,format(%9.0fc)) 
	graph export $outdir/comp_growth_noppp_bin`xx'.png, replace width(3000)
}


////////////////////////////////////////////////////////////////////////////////
// step 2: emp growth regs
////////////////////////////////////////////////////////////////////////////////

use id q min_emp_2018 min_emp_2019 min_emp_2020q1 naics_2_mode st_mode ///
med_wage total_compensation num_employees g using ${datadir}/reg_data, clear

// drop if 0 or 1 employees at any point in 2018 or 2019
drop if min_emp_2018==0 | min_emp_2018==1 | min_emp_2019==0 | min_emp_2019==1 | min_emp_2020q1==0 | min_emp_2020q1==1

// drop if two digit naics is missing
drop if naics_2_mode==. | naics_2_mode==0

// drop if state is missing
drop if st_mode==0 | st_mode==.

replace g=0 if g==.

// drop restuarants (special covid treatment), public admin, and utilities
keep if (naics_2_mode~=92 & naics_2_mode~=22 & naics_2_mode~=72 & naics_2_mode~=49 & naics_2_mode~=55)

// create mean employee size in 2019
g x_2019=num_employees if q>=5 & q<=8
	
egen mean_emp_2019=mean(x_2019), by(id)

drop x_2019

// clean the compensation variables
replace med_wage=0 if med_wage==.
replace total_compensation=0 if total_compensation==.

// have wages be the max of med_wages and total_compensation
replace med_wage=max(med_wage, total_compensation)

gen mean_comp=med_wage/num_employees

// replace med_wage to be "100000*employee count" if mean comp exceeds 100k
replace med_wage=100000*num_employees if mean_comp>100000 & mean_comp~=.


// size bin
g firm_size_bin=0 if min_emp_2018<10
	replace firm_size_bin=1 if min_emp_2018>=10 & min_emp_2018<50
	replace firm_size_bin=2 if min_emp_2018>=50 & min_emp_2018<100
	replace firm_size_bin=3 if min_emp_2018>=100 & min_emp_2018<250
	replace firm_size_bin=4 if min_emp_2018>=250
	
// outcome variable: employment relative to the average quarterly employment in 2018 and 2019
	// percentage change in employment

// time series
tsset id q
	
g emp_2018=l8.num_employees if q>=9 & q<=12
	replace emp_2018=l4.num_employees if q>=5 & q<=8
	replace emp_2018=l12.num_employees if q>=13 & q<=16
	replace emp_2018=l16.num_employees if q>16

g emp_2019=l4.num_employees if q>=9 & q<=12
	replace emp_2019=l8.num_employees if q>=13 & q<=16
	replace emp_2019=l12.num_employees if q>16

// outcome variable: employment relative to the average quarterly employment in 2018 and 2019
g y=num_employees/emp_2018 if q>4

// cap outcome variable at 10
	replace y=10 if y>10 & y!=.

// drop if y is missing after the base year
gen deerop1=0
replace deerop1=1 if y==. & q>4
egen deerop2=max(deerop1), by(id)
drop if deerop2==1
drop deerop1 deerop2	

// create cell identifiers
preserve

	gcollapse (mean) y_m=y, by(naics_2_mode st_mode firm_size_bin)
	
	g id=_n
	
	save $datadir/y_m, replace

restore

merge m:1 naics_2_mode st_mode firm_size_bin using $datadir/y_m, update replace nogen

g y_dm=y-y_m

forval xx=5/5{
	
	if `xx' == 5 {
		csdid y_dm  if mean_emp_2019<600, i(id) t(q) g(g) method(dripw) wboot
	}
	else {
		csdid y_dm if firm_size_bin==`xx', i(id) t(q) g(g) method(dripw) wboot
	}
	
	
	estat simple, estore(bin`xx'_att)
	preserve
	clear
	svmat double r(table), names(col)
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	gen firm_size_bin = `xx'
	save "${datadir}/reg_tables/reg_emp_att_growth_noppp_bin`xx'", replace
	restore	
	
	estat event

	preserve
	clear
	svmat double r(table), names(col)
	gen firm_size_bin = `xx'
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	merge 1:1 stat using "${datadir}/reg_tables/reg_emp_att_growth_noppp_bin`xx'", nogen

	save "${datadir}/reg_tables/reg_emp_table_growth_noppp_bin`xx'", replace
	restore
	
	estat event
	
	csdid_plot, graphregion(color(white)) bgcolor(white) ///
	ylabel(,format(%9.2fc)) ///
	ytitle("Growth in Employees (Base Year: 2018)") ///
	xtitle("Quarters to Treatment") xscale(r(-8 6)) xlabel(-8(2)6,format(%9.0fc)) 
	graph export $outdir/emp_growth_noppp_bin`xx'.png, replace width(3000)
}

log close
